* This is the do file to create "Table D2. Summary Statistics by Attrition Status" and "Table D3. Summary Statistics by Attrition Status and Treatment Status"
set seed 123

use "$path_data/temp/student_unbalance", clear

gen female = 1 if student_gender == 0
gen grade_2 = 1 if grade == 2
recode female grade_2(.=0)

gen tracked = 1 - attrition

/// Standardization
egen DT_score_pre_mean = mean(DT_score_pre)
egen DT_score_pre_sd = sd(DT_score_pre)
gen DT_score_pre_std = (DT_score_pre-DT_score_pre_mean)/DT_score_pre_sd
drop DT_score_pre_mean DT_score_pre_sd 

egen cpcs_pre_mean = mean(cpcs_pre)
egen cpcs_pre_sd = sd(cpcs_pre)
gen cpcs_pre_std = (cpcs_pre-cpcs_pre_mean)/cpcs_pre_sd
drop cpcs_pre_mean cpcs_pre_sd 

egen rosen_pre_mean = mean(rosen_pre)
egen rosen_pre_sd = sd(rosen_pre)
gen rosen_pre_std = (rosen_pre-rosen_pre_mean)/rosen_pre_sd
drop rosen_pre_mean rosen_pre_sd 


replace school_no = 999 if school_no ==.


/// Mean difference
foreach j in n mean sd {
tabstat DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2 if tracked == 1, stat(`j') save
matrix list r(StatTotal)
matrix `j'_tr_bl = r(StatTotal)

tabstat DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2 if tracked == 0, stat(`j') save
matrix list r(StatTotal)
matrix `j'_ct_bl = r(StatTotal)
}

matrix n_bl = J(1,5,.)
forvalues i = 1/5 {
	matrix n_bl[1,`i'] = n_tr_bl[1,`i'] + n_ct_bl[1,`i']
}

foreach dep in DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2{
    
    wildbootstrap reg `dep' tracked, cluster(school_no) reps(1000)
	
	matrix r2_`dep'_temp = r(table)


    foreach s in mean se pv obs {
		matrix r2_`dep'_`s' = J(1,2,.)
	}

	foreach j in 1 2 {
	* beta
	matrix r2_`dep'_mean[1,`j'] = r2_`dep'_temp[1,`j']
	* standard error
	* matrix r2_`dep'_se[1,`j'] = r2_`dep'_temp[2,`j']
	* p value
	matrix r2_`dep'_pv[1,`j'] = r2_`dep'_temp[3,`j']
	}

}


// significant level

local outcome DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2

foreach dep in `outcome'{
		if r2_`dep'_pv[1,1]<=0.01 {
			local star_`dep' %3s "***"
		}
		else if (r2_`dep'_pv[1,1]>0.01) & (r2_`dep'_pv[1,1]<=0.05) {
			local star_`dep' %2s "**"
		}
		else if (r2_`dep'_pv[1,1]>0.05) & (r2_`dep'_pv[1,1]<=0.10) {
			local star_`dep' %1s "*"
		}
		else {
			local star_`dep'  ""
		}
} 

rwolf DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2, indepvar(attrition) reps(1000) cluster(school_no) vce(cluster school_no)
scalar rwolf_p1 = e(rw_DT_score_pre_std)
scalar rwolf_p2 = e(rw_rosen_pre_std)
scalar rwolf_p3 = e(rw_cpcs_pre_std)
scalar rwolf_p4 = e(rw_female)
scalar rwolf_p5 = e(rw_grade_2)


/// Table
tempname hh2
file open `hh2' using "$path_output/summary_stat_baseline_attrition.tex", write replace
file write `hh2' "" _newline
file write `hh2' "% Author: Kazuma Takakura" _newline
file write `hh2' "% Date: `c(current_date)'" _newline
file write `hh2' "% Time: `c(current_time)'" _newline
file write `hh2' "" _newline


file write `hh2' "\begin{table}[h!]\footnotesize" _newline
file write `hh2' "  \centering" _newline
file write `hh2' "  \caption{Summary Statistics by Attrition Status}" _newline
file write `hh2' "\label{tab:sumstat_attrition}" _newline
file write `hh2' "\scalebox{1}{" _newline
file write `hh2' "\begin{threeparttable}" _newline

file write `hh2' "\begin{tabular}{lccccc}\toprule" _newline

  
file write `hh2' " Dependent Variable & Tracked & Attrition & Difference & N   \\\midrule\midrule" _newline
file write `hh2' " DT score^{a} & " %04.3f (mean_tr_bl[1,1]) " & " %04.3f (mean_ct_bl[1,1]) " & " %04.3f (r2_DT_score_pre_std_mean[1,1]) `star_DT_score_pre_std' " & " (n_bl[1,1]) "  \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_tr_bl[1,1]) " ] & [ " %04.3f (sd_ct_bl[1,1]) " ] & ( " %04.3f (r2_DT_score_pre_std_pv[1,1]) " ) &   \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p1) " \} &   \\ " _newline


file write `hh2' " RSES^{a} & " %04.3f (mean_tr_bl[1,2]) " & " %04.3f (mean_ct_bl[1,2]) " & " %04.3f (r2_rosen_pre_std_mean[1,1]) `star_rosen_pre_std' " & "  (n_bl[1,2]) " \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_tr_bl[1,2]) " ] & [ " %04.3f (sd_ct_bl[1,2]) " ] & ( " %04.3f (r2_rosen_pre_std_pv[1,1]) " ) &   \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p2) " \} &   \\ " _newline

file write `hh2' " CPCS^{a} & " %04.3f (mean_tr_bl[1,3]) " & " %04.3f (mean_ct_bl[1,3]) " & " %04.3f (r2_cpcs_pre_std_mean[1,1]) `star_cpcs_pre_std' " & "  (n_bl[1,3]) " \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_tr_bl[1,3]) " ] & [ " %04.3f (sd_ct_bl[1,3]) " ] & ( " %04.3f (r2_cpcs_pre_std_pv[1,1]) " ) &   \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p3) " \} &   \\ " _newline

file write `hh2' " Female & " %04.3f (mean_tr_bl[1,4]) " & " %04.3f (mean_ct_bl[1,4]) " & " %04.3f (r2_female_mean[1,1]) `star_female' " & "  (n_bl[1,4]) " \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_tr_bl[1,4]) " ] & [ " %04.3f (sd_ct_bl[1,4]) " ] & ( " %04.3f (r2_female_pv[1,1]) " ) &   \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p4) " \} &   \\ " _newline

file write `hh2' " Grade 3 & " %04.3f (mean_tr_bl[1,5]) " & " %04.3f (mean_ct_bl[1,5]) " & " %04.3f (r2_grade_2_mean[1,1]) `star_grade_2' " & "  (n_bl[1,5]) " \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_tr_bl[1,5]) " ] & [ " %04.3f (sd_ct_bl[1,5]) " ] & ( " %04.3f (r2_grade_2_pv[1,1]) " ) &   \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p5) " \} &   \\ " _newline

file write `hh2' " \\ "_newline



file write `hh2' "\midrule" _newline
file write `hh2' "\end{tabular}" _newline
file write `hh2' "\begin{tablenotes}" _newline
file write `hh2' "\item (a) Variables are standardized using the average and variance of the baseline sample in the March 2016 survey. " _newline
file write `hh2' "\item (b) Standard deviations are reported in square brackets." _newline
file write `hh2' "\item (c) Wild clustered bootstrap p-values are reported within parentheses. Clusters are schools at the baseline. There are 34 clusters. " _newline
file write `hh2' "\item (d) Romano-Wolf multiple hypothesis testing p-values are reported in curly brackets." _newline
file write `hh2' "\item (e) Statistical significance is indicated by stars based on the wild clustered bootstrap p-values reported in parentheses: $*$ denotes significance at the 10\% level, $∗∗$ at the 5\% level, and $∗∗∗$ at the 1\% level. " _newline
file write `hh2' "\end{tablenotes}" _newline
file write `hh2' "\end{threeparttable}" _newline
file write `hh2' "}" _newline
file write `hh2' "\label{tab:addlabel}%" _newline
file write `hh2' "\end{table}" _newline

file write `hh2' "" _newline
file write `hh2' "" _newline
file write `hh2' "" _newline
file write `hh2' "" _newline

file close `hh2'



/// Differences in 2*2 groups
/// A = Tracked & Treated
/// B = Tracked & Control
/// C = Attrition & Treated
/// D = Attrition & Control

gen group_A = 1 if attrition == 0 & treatment == 1
gen group_B = 1 if attrition == 0 & treatment == 0
gen group_C = 1 if attrition == 1 & treatment == 1
gen group_D = 1 if attrition == 1 & treatment == 0
recode group_* (.=0)

preserve

/// A vs C
restore
preserve
keep if group_A == 1 | group_C == 1
local dummy group_A

// Mean difference
foreach j in n mean sd {
tabstat DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2 if `dummy' == 1, stat(`j') save
matrix list r(StatTotal)
matrix `j'_track_treat_bl = r(StatTotal)

tabstat DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2 if `dummy' == 0, stat(`j') save
matrix list r(StatTotal)
matrix `j'_attrition_treat_bl = r(StatTotal)
}



foreach dep in DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2{
    
    wildbootstrap reg `dep' `dummy', cluster(school_no) reps(1000)
	
	matrix r2_`dep'_temp = r(table)


    foreach s in mean se pv obs {
		matrix r2_`dep'_treat_`s' = J(1,2,.)
	}

	foreach j in 1 2 {
	* beta
	matrix r2_`dep'_treat_mean[1,`j'] = r2_`dep'_temp[1,`j']
	* standard error
	* matrix r2_`dep'_treat_se[1,`j'] = r2_`dep'_temp[2,`j']
	* p value
	matrix r2_`dep'_treat_pv[1,`j'] = r2_`dep'_temp[3,`j']
	}

}


// significant level

local outcome DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2

foreach dep in `outcome'{
		if r2_`dep'_treat_pv[1,1]<=0.01 {
			local star_`dep'_treat %3s "***"
		}
		else if (r2_`dep'_treat_pv[1,1]>0.01) & (r2_`dep'_treat_pv[1,1]<=0.05) {
			local star_`dep'_treat %2s "**"
		}
		else if (r2_`dep'_treat_pv[1,1]>0.05) & (r2_`dep'_treat_pv[1,1]<=0.10) {
			local star_`dep'_treat %1s "*"
		}
		else {
			local star_`dep'_treat  ""
		}
} 

rwolf DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2, indepvar(`dummy') reps(1000) cluster(school_no) vce(cluster school_no)
scalar rwolf_p1_treat = e(rw_DT_score_pre_std)
scalar rwolf_p2_treat = e(rw_rosen_pre_std)
scalar rwolf_p3_treat = e(rw_cpcs_pre_std)
scalar rwolf_p4_treat = e(rw_female)
scalar rwolf_p5_treat = e(rw_grade_2)



/// B vs D
restore
preserve
keep if group_B == 1 | group_D == 1
local dummy group_B

// Mean difference
foreach j in n mean sd {
tabstat DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2 if `dummy' == 1, stat(`j') save
matrix list r(StatTotal)
matrix `j'_track_control_bl = r(StatTotal)

tabstat DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2 if `dummy' == 0, stat(`j') save
matrix list r(StatTotal)
matrix `j'_attrition_control_bl = r(StatTotal)
}

foreach dep in DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2{
    wildbootstrap reg `dep' `dummy', cluster(school_no) reps(1000)
	matrix r2_`dep'_temp = r(table)
    foreach s in mean se pv obs {
		matrix r2_`dep'_control_`s' = J(1,2,.)
	}
	foreach j in 1 2 {
	* beta
	matrix r2_`dep'_control_mean[1,`j'] = r2_`dep'_temp[1,`j']
	* standard error
	* matrix r2_`dep'_control_se[1,`j'] = r2_`dep'_temp[2,`j']
	* p value
	matrix r2_`dep'_control_pv[1,`j'] = r2_`dep'_temp[3,`j']
	}
}
// significant level

local outcome DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2
foreach dep in `outcome'{
		if r2_`dep'_control_pv[1,1]<=0.01 {
			local star_`dep'_control %3s "***"
		}
		else if (r2_`dep'_control_pv[1,1]>0.01) & (r2_`dep'_control_pv[1,1]<=0.05) {
			local star_`dep'_control %2s "**"
		}
		else if (r2_`dep'_control_pv[1,1]>0.05) & (r2_`dep'_control_pv[1,1]<=0.10) {
			local star_`dep'_control %1s "*"
		}
		else {
			local star_`dep'_control  ""
		}
} 

rwolf DT_score_pre_std rosen_pre_std cpcs_pre_std female grade_2, indepvar(`dummy') reps(1000) cluster(school_no) vce(cluster school_no)
scalar rwolf_p1_control = e(rw_DT_score_pre_std)
scalar rwolf_p2_control = e(rw_rosen_pre_std)
scalar rwolf_p3_control = e(rw_cpcs_pre_std)
scalar rwolf_p4_control = e(rw_female)
scalar rwolf_p5_control = e(rw_grade_2)


/// Table
tempname hh2
file open `hh2' using "$path_output/summary_stat_baseline_attrition_treatment.tex", write replace
file write `hh2' "" _newline
file write `hh2' "% Author: Kazuma Takakura" _newline
file write `hh2' "% Date: `c(current_date)'" _newline
file write `hh2' "% Time: `c(current_time)'" _newline
file write `hh2' "" _newline


file write `hh2' "\begin{table}[h!]\footnotesize" _newline
file write `hh2' "  \centering" _newline
file write `hh2' "  \caption{Summary Statistics by Attrition Status and Treatment Status}" _newline
file write `hh2' "\label{tab:difference_attrition_treatment}" _newline
file write `hh2' "\scalebox{0.9}{" _newline
file write `hh2' "\begin{threeparttable}" _newline

file write `hh2' "\begin{tabular}{lccccccc}\toprule" _newline

  
file write `hh2' " Dependent Variable & Tracked-Treated & Attrition-Treated  & Difference  & Tracked-Control & Attrition-Control & Difference   \\\midrule\midrule" _newline
file write `hh2' " DT score^{a} & " %04.3f (mean_track_treat_bl[1,1]) " & " %04.3f (mean_attrition_treat_bl[1,1]) " & " %04.3f (r2_DT_score_pre_std_treat_mean[1,1]) `star_DT_score_pre_std_treat' " & " %04.3f (mean_track_control_bl[1,1]) " & " %04.3f (mean_attrition_control_bl[1,1]) " & " %04.3f (r2_DT_score_pre_std_control_mean[1,1]) `star_DT_score_pre_std_control' "  \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_track_treat_bl[1,1]) " ] & [ " %04.3f (sd_attrition_treat_bl[1,1]) " ] & ( " %04.3f (r2_DT_score_pre_std_treat_pv[1,1]) " )  & [ " %04.3f (sd_track_control_bl[1,1]) " ] & [ " %04.3f (sd_attrition_control_bl[1,1]) " ] & ( " %04.3f (r2_DT_score_pre_std_control_pv[1,1]) " ) \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p1_treat) " \} &  &   & \{ " %04.3f (rwolf_p1_control) " \}  \\ " _newline


file write `hh2' " RSES^{a} & " %04.3f (mean_track_treat_bl[1,2]) " & " %04.3f (mean_attrition_treat_bl[1,2]) " & " %04.3f (r2_rosen_pre_std_treat_mean[1,1]) `star_rosen_pre_std_treat' " & " %04.3f (mean_track_control_bl[1,2]) " & " %04.3f (mean_attrition_control_bl[1,2]) " & " %04.3f (r2_DT_score_pre_std_control_mean[1,1]) `star_DT_score_pre_std_control' " \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_track_treat_bl[1,2]) " ] & [ " %04.3f (sd_attrition_treat_bl[1,2]) " ] & ( " %04.3f (r2_rosen_pre_std_treat_pv[1,1]) " )   & [ " %04.3f (sd_track_control_bl[1,2]) " ] & [ " %04.3f (sd_attrition_control_bl[1,2]) " ] & ( " %04.3f (r2_rosen_pre_std_control_pv[1,1]) " )  \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p2_treat) " \} &  &   & \{ " %04.3f (rwolf_p2_control) " \}  \\ " _newline

file write `hh2' " CPCS^{a} & " %04.3f (mean_track_treat_bl[1,3]) " & " %04.3f (mean_attrition_treat_bl[1,3]) " & " %04.3f (r2_cpcs_pre_std_treat_mean[1,1]) `star_cpcs_pre_std_treat' " & " %04.3f (mean_track_control_bl[1,3]) " & " %04.3f (mean_attrition_control_bl[1,3]) " & " %04.3f (r2_cpcs_pre_std_control_mean[1,1]) `star_cpcs_pre_std_control' " \\ " _newline
file write `hh2' "  	 & [ " %04.3f (sd_track_treat_bl[1,3]) " ] & [ " %04.3f (sd_attrition_treat_bl[1,3]) " ] & ( " %04.3f (r2_cpcs_pre_std_treat_pv[1,1]) " )  & [ " %04.3f (sd_track_control_bl[1,3]) " ] & [ " %04.3f (sd_attrition_control_bl[1,3]) " ] & ( " %04.3f (r2_cpcs_pre_std_control_pv[1,1]) " )  \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p3_treat) " \} &   &   & \{ " %04.3f (rwolf_p3_control) " \} \\ " _newline

file write `hh2' " Female & " %04.3f (mean_track_treat_bl[1,4]) " & " %04.3f (mean_attrition_treat_bl[1,4]) " & " %04.3f (r2_female_treat_mean[1,1]) `star_female_treat' " & " %04.3f (mean_track_control_bl[1,4]) " & " %04.3f (mean_attrition_control_bl[1,4]) " & " %04.3f (r2_female_control_mean[1,1]) `star_female_control' " \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_track_treat_bl[1,4]) " ] & [ " %04.3f (sd_attrition_treat_bl[1,4]) " ] & ( " %04.3f (r2_female_treat_pv[1,1]) " )   & [ " %04.3f (sd_track_control_bl[1,4]) " ] & [ " %04.3f (sd_attrition_control_bl[1,4]) " ] & ( " %04.3f (r2_female_control_pv[1,1]) " )   \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p4_treat) " \} &  &   & \{ " %04.3f (rwolf_p4_control) " \}  \\ " _newline

file write `hh2' " Grade 3 & " %04.3f (mean_track_treat_bl[1,5]) " & " %04.3f (mean_attrition_treat_bl[1,5]) " & " %04.3f (r2_grade_2_treat_mean[1,1]) `star_grade_2_treat' " & " %04.3f (mean_track_control_bl[1,5]) " & " %04.3f (mean_attrition_control_bl[1,5]) " & " %04.3f (r2_grade_2_control_mean[1,1]) `star_grade_2_control' " \\ " _newline
file write `hh2' "    				 & [ " %04.3f (sd_track_treat_bl[1,5]) " ] & [ " %04.3f (sd_attrition_treat_bl[1,5]) " ] & ( " %04.3f (r2_grade_2_treat_pv[1,1]) " )   & [ " %04.3f (sd_track_control_bl[1,5]) " ] & [ " %04.3f (sd_attrition_control_bl[1,5]) " ] & ( " %04.3f (r2_grade_2_control_pv[1,1]) " )   \\ " _newline
file write `hh2' "    				 &   &   & \{ " %04.3f (rwolf_p5_treat) " \} &   &   & \{ " %04.3f (rwolf_p5_control) " \}  \\ " _newline

file write `hh2' " \\ "_newline


file write `hh2' "\midrule" _newline
file write `hh2' "\end{tabular}" _newline
file write `hh2' "\begin{tablenotes}" _newline
file write `hh2' "\item (a) Variables are standardized using the average and variance of the whole baseline sample in the March 2016 survey. " _newline
file write `hh2' "\item (b) Standard deviations are reported in square brackets." _newline
file write `hh2' "\item (c) Wild clustered bootstrap p-values are reported within parentheses. Clusters are schools at the baseline. There are 34 clusters. " _newline
file write `hh2' "\item (d) Romano-Wolf multiple hypothesis testing p-values are reported in curly brackets." _newline
file write `hh2' "\item (e) $^*$ Significant at 10\% level; $^{**}$ significant at 5\% level; $^{***}$ significant at 1\% level. " _newline
file write `hh2' "\end{tablenotes}" _newline
file write `hh2' "\end{threeparttable}" _newline
file write `hh2' "}" _newline
file write `hh2' "\label{tab:addlabel}%" _newline
file write `hh2' "\end{table}" _newline

file write `hh2' "" _newline
file write `hh2' "" _newline
file write `hh2' "" _newline
file write `hh2' "" _newline

file close `hh2'

